# -*- coding: utf-8 -*-
"""
Created on Sat Mar 27 10:46:58 2021

@author: Apple
"""

# 导包
import requests # 用以替代浏览器，发送Http请求，获取网站响应
from lxml import etree # 用来对爬取的数据进行提取和整理
from urllib import request
from bs4 import BeautifulSoup

# 构建函数，实现数据爬取
def crow(i):
    # 构建请求对象，并从网站返回响应对象
    url = 'https://www.goodreads.cc/sci-fi/lastupdate_'+str(i)+'.html'
    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36'}
    html = requests.get(url,headers=headers).text
    soup = BeautifulSoup(html.text,'lxml')
    
    # 利用工具解析响应对象
    html = etree.HTML(html)
    datas = html.xpath('//*[@id="diyax3"]/article')
    a = 0
    for data in datas:
        data_title = data.xpath('header/h2/a')
        data_author = data.xpath('p[1]/a')
        
        
        
    # 打印正在解析的数据
    
    print("No"+str((i-1)*20+a+1))
    print(data_title)
    
    # 存储数据
    
    with open('douban.txt','a',encoding='utf-8') as f:
        # picname = r'C:\Users\18757\Desktop\top250'+data_title[0]+'.jpg'
        f.write("NO:"+str((i-1)*20+a+1)+'\n')
    a+=1

crow(1)
    

